#load integrated object
load(file = "forebrain_all_final.RObj")
##' Create a pseudoBulk from a single-cell expression matrix
##'
##' Create a pseudoBulk from a single-cell expression matrix
##'
##' @param seurat.obj Integrated seurat object
##' @param clusters Vector or factor along the cells giving cluster memberships.
##' Must have length(clusters) == ncol(n). If a factor then unused levels will be
##' dropped; if not a factor then will be converted to one.
##' @param rowAggregator Default rowSum. Or some other function that accepts a matrix
##' and returns a numeric vector along the rows giving some "bulk summary" of the expression value.
##' @return Matrix of features x clusters
##' @author Shristi Pandey
##' @export
##'
pseudoBulk <- function(seurat.obj, clusters, rowAggregator = rowSums) {
#n <- as.matrix(GetAssayData(seurat.obj, slot = 'counts'))
n <- GetAssayData(seurat.obj, slot = 'counts')
stopifnot(length(clusters) == ncol(n))
## drop unused levels if it IS a factor, otherwise make it a factor
clusters <- factor(clusters)
names(clusterLevels) <- clusterLevels <- levels(clusters)
psb <- sapply(clusterLevels, function(cl) {
#.richMessage("... collapsing ", cl, caller = "pseudoBulk")
i <- clusters == cl
vec <- rowAggregator(n[,i, drop = FALSE])
stopifnot(names(vec) == rownames(n)) ## double check we are preserving rownames
vec
})
return(psb)
}
##' Given a seurat object, returns a set up summarized object to run pseudobulk with voom
##'
##' @param seurat.obj Seurat Object
##' @param treatment string representing the genotype/timepoint or other treatment type
##' @param sample_field String representing the sample metadata
##' @param celltype_field String representing the cluster field in the seurat object
##' @param study_field String representing the study field if the object represents an integration of multiple studies
##' @param cluster_prop Proportion of the cluster in the total dataset expressed in percent. For creating different psbs for different cluster sizes. To modulate the minimum
##' @author Shristi Pandey
##' @return Summarized experiment object with appropriate
##'
setup_se_psb <- function(seurat.obj, treatment_field=NULL, sample_field = 'sampleID', celltype_field = NULL, study_field = 'orig.ident'){
library(Seurat)
library(dplyr)
library(edgeR)
library(SummarizedExperiment)
DefaultAssay(seurat.obj) <- 'RNA' # important when you are performing experiment with integrated object
if (is.null(treatment_field)){
stop('No treatment field provided')
}
treatment = unlist(seurat.obj[[treatment_field]])
if (is.null(sample_field)){
stop('No sample field provided')
}
samples = unlist(seurat.obj[[sample_field]])
if (!is.null(celltype_field)){
celltype = unlist(seurat.obj[[celltype_field]])
}
if (is.null(celltype_field)){
pseudobulkID <- paste0(treatment,'_',samples)
} else {
pseudobulkID <- paste0(treatment,'_',samples, '_', celltype)
}
seurat.obj$pseudobulkID <- pseudobulkID
psb_clusters <- factor(seurat.obj$pseudobulkID)
numcells <- table(seurat.obj$pseudobulkID)
#check order of the metadata to pseudobulk order
psb <- pseudoBulk(seurat.obj, psb_clusters, rowAggregator = rowSums)
if (is.null(celltype_field)) {
col_data <- data.frame(seurat.obj$pseudobulkID, seurat.obj[[treatment_field]], seurat.obj[[sample_field]], seurat.obj[[study_field]])
colnames(col_data) <- c('pseudobulkID', 'treatment', 'batch', 'study')
} else{
col_data <- data.frame(seurat.obj$pseudobulkID, seurat.obj[[treatment_field]], seurat.obj[[sample_field]], seurat.obj[[celltype_field]], seurat.obj[[study_field]])
colnames(col_data) <- c('pseudobulkID', 'treatment', 'batch', 'celltype', 'study')
}
col_data <- unique(col_data)
rownames(col_data) <- col_data$pseudobulkID
col_data <- col_data[colnames(psb),]
col_data$nCells <- numcells[rownames(col_data)]
if(!all(rownames(col_data) == colnames(psb))) {
stop('Rownames of coldata do not match with the colnames of the assay matrix. Check')
}
norm_counts <- cpm(psb, log=FALSE)
norm_counts <- log2(norm_counts+1)
se <- SummarizedExperiment(assays = list(counts = psb, normalized = norm_counts), colData = col_data)
return (se)
}
##' Sets up and runs voom
##'
##' @param se Summarized experiment
##' @param groups slot containing the groups that you would like to compare
##' @param comparisons comparisons that are of interest; used to create contrasts
##' @return results for all comparisons listed in comparisons
##' @author Shristi Pandey
##' @export
##'
setup_and_runVoom <- function(se = NULL, groups = NULL, comparisons = NULL){
library(edgeR)
library(dplyr)
d0 <- DGEList(assay(se, 1))
d0$genes <- data.frame(origin=seq_len(nrow(se1)))
d0 <- calcNormFactors(d0)
meta <- colData(se) %>% as.data.frame()
data <- subset(meta, select = c('treatment'))
#extract out the treatment col
design <- model.matrix(~0+treatment, data = data)
filtered <- filterByExpr(d0, design=design, large.n=0)
summary(filtered)
d0 <- d0[filtered,]
d0 <- calcNormFactors(d0)
head(d0$samples, 10)
library(limma)
v <- voomWithQualityWeights(d0, design=design, plot=TRUE)
fit <- lmFit(v)
fit <- eBayes(fit, robust=TRUE)
plotSA(fit)
summary(fit$df.prior)
all_results <- List()
#create contrasts for the first comparison:
con <- integer(ncol(design))
names(con) <- colnames(design)
con[c("treatmentAdult", "treatment15dpf")] <- c(1L, -1L)
fit2 <- contrasts.fit(fit, contrasts = con)
fit2 <- eBayes(fit2, robust=TRUE)
res <- topTable(fit2, n=Inf, sort.by="none")
head(res[order(res$adj.P.Val),])
summary(decideTests(fit2))
summary(res$adj.P.Val <= 0.05)
plotMD(fit2, status=decideTests(fit2), hl.cex=0.5)
volcanoplot(fit2)
res$Comparison <- "Difference between `Adult` vs `15dpf`"
all_results[["Difference between `Adult` vs `15dpf`"]] <- res
#create contrasts for the Adult vs. 6dpf comparison and shove it to all_results:
con <- integer(ncol(design))
names(con) <- colnames(design)
con[c("treatmentAdult", "treatment6dpf")] <- c(1L, -1L)
fit2 <- contrasts.fit(fit, contrasts = con)
fit2 <- eBayes(fit2, robust=TRUE)
res <- topTable(fit2, n=Inf, sort.by="none")
head(res[order(res$adj.P.Val),])
summary(decideTests(fit2))
summary(res$adj.P.Val <= 0.05)
plotMD(fit2, status=decideTests(fit2), hl.cex=0.5)
volcanoplot(fit2)
res$Comparison <- 'Difference between Adult vs. 6dpf'
all_results[["Difference between `Adult` vs `6dpf`"]] <- res
#create contrasts for the 15dpf vs. 6dpf comparison and shove it to all_results:
con <- integer(ncol(design))
names(con) <- colnames(design)
con[c("treatment15dpf", "treatment6dpf")] <- c(1L, -1L)
fit2 <- contrasts.fit(fit, contrasts = con)
fit2 <- eBayes(fit2, robust=TRUE)
res <- topTable(fit2, n=Inf, sort.by="none")
head(res[order(res$adj.P.Val),])
summary(decideTests(fit2))
summary(res$adj.P.Val <= 0.05)
plotMD(fit2, status=decideTests(fit2), hl.cex=0.5)
volcanoplot(fit2)
res$Comparison <- "Difference between `15dpf` vs `6dpf`"
all_results[["Difference between `15dpf` vs `6dpf`"]] <- res
return(all_results)
}
forebrain.integrated$study <- 'sp'
se <- setup_se_psb(seurat.obj=forebrain.integrated, treatment_field='age', celltype_field='clusterInterp2', sample_field = 'batch', study_field = 'study')
se <- se[, se$nCells > 50]
#not including Pallium01 and subpallium02 because there are 0 samples at 6 and 15dpf
se <- se[, se$celltype %in% c('Pallium_02', 'Pallium_03', 'Pallium_04', 'Pallium_05', 'Pallium_06', 'Pallium_07', 'Pallium_08', 'Pallium_09',
'Subpallium_02', 'Subpallium_03', 'Subpallium_04', 'Subpallium_05', 'Subpallium_06', 'Subpallium_07', 'Subpallium_08')]
#create results table for all comparisons
cols <- c("genes","origin", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", 'Comparisons', 'Celltype')
adult_vs_15dpf <- setNames(data.frame(matrix(ncol = 10, nrow = 0)), c(cols))
adult_vs_6dpf <- setNames(data.frame(matrix(ncol = 10, nrow = 0)), c(cols))
a15dpf_vs_6dpf <- setNames(data.frame(matrix(ncol = 10, nrow = 0)), c(cols))
for (elem in unique(se$celltype)){
se1 <- se[, se$celltype == elem]
all_results <- setup_and_runVoom(se1,
groups = 'treatment',
comparisons = list(c('Adult', '15dpf'), c('Adult', '6dpf'), c('15dpf', '6dpf')))
b <- as.data.frame(subset(all_results[[1]], abs(logFC) >= log2(2) & adj.P.Val <= 0.05))
c <- as.data.frame(subset(all_results[[2]], abs(logFC) >= log2(2) & adj.P.Val <= 0.05))
d <- as.data.frame(subset(all_results[[3]], abs(logFC) >= log2(2) & adj.P.Val <= 0.05))
b$genes <- rownames(b)
b$Comparisons <- 'Adult_vs_15dpf'
b$Celltype <- elem
adult_vs_15dpf <- merge(adult_vs_15dpf, b, all = T)
c$genes <- rownames(c)
c$Comparisons <- 'Adult_vs_6dpf'
c$Celltype <- elem
adult_vs_6dpf <- merge(adult_vs_6dpf, c, all = T)
d$genes <- rownames(d)
d$Comparisons <- '15_vs_6dpf'
d$Celltype <- elem
a15dpf_vs_6dpf <- merge(a15dpf_vs_6dpf, d, all = T)
}write.csv(adult_vs_15dpf, file = '~/adult_Vs_15dpf.csv')
write.csv(adult_vs_6dpf, file = '~/adult_vs_6dpf.csv')
write.csv(a15dpf_vs_6dpf, file = '~/a15dpf_vs_6dpf.csv')
## create upset plots
library(UpSetR)
a15dpf_vs_6dpf <- read.csv("~/a15dpf_vs_6dpf.csv", row.names = 1)
adult_vs_15dpf <- read.csv("~/adult_Vs_15dpf.csv", row.names = 1)
adult_vs_6dpf <- read.csv("~/adult_vs_6dpf.csv", row.names = 1)
adult_vs_15dpf <- adult_vs_15dpf[adult_vs_15dpf$logFC >0, ]
a15dpf_vs_6dpf <- a15dpf_vs_6dpf[a15dpf_vs_6dpf$logFC >0, ]
adult_vs_6dpf <- adult_vs_6dpf[adult_vs_6dpf$logFC >0, ]
#adult vs 6
dat_list = split(adult_vs_6dpf, adult_vs_6dpf$Celltype)
upsetlist <- vector(mode = "list", length = length(dat_list))
names(upsetlist) <- names(dat_list)
for (i in 1:length(dat_list)){
genes <- c()
#print(names(dat_list[i]))
genes <- dat_list[[i]]$genes
#print(genes)
upsetlist[[names(dat_list[i])]] = genes
}
upset(fromList(upsetlist), order.by = "freq", nsets = length(upsetlist))#6 v 15dpf
dat_list = split(a15dpf_vs_6dpf, a15dpf_vs_6dpf$Celltype)
upsetlist <- vector(mode = "list", length = length(dat_list))
names(upsetlist) <- names(dat_list)
for (i in 1:length(dat_list)){
genes <- c()
#print(names(dat_list[i]))
genes <- dat_list[[i]]$genes
#print(genes)
upsetlist[[names(dat_list[i])]] = genes
}
upset(fromList(upsetlist), order.by = "freq", nsets = length(upsetlist))#adult vs 15dpf
dat_list = split(adult_vs_15dpf, adult_vs_15dpf$Celltype)
upsetlist <- vector(mode = "list", length = length(dat_list))
names(upsetlist) <- names(dat_list)
for (i in 1:length(dat_list)){
genes <- c()
#print(names(dat_list[i]))
genes <- dat_list[[i]]$genes
#print(genes)
upsetlist[[names(dat_list[i])]] = genes
}
upset(fromList(upsetlist), order.by = "freq", nsets = length(upsetlist))Packages and versions necessary to reproduce the results in this report.
sessionInfo()## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
##
## Matrix products: default
## BLAS/LAPACK: /data/rc/apps/rc/software/FlexiBLAS/3.0.4-GCC-10.3.0/lib64/libflexiblas.so.3.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] UpSetR_1.4.0 SummarizedExperiment_1.28.0
## [3] Biobase_2.58.0 GenomicRanges_1.50.2
## [5] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [7] S4Vectors_0.36.2 BiocGenerics_0.44.0
## [9] MatrixGenerics_1.10.0 matrixStats_0.63.0
## [11] edgeR_3.40.2 limma_3.54.2
## [13] dplyr_1.1.1 statmod_1.5.0
## [15] SeuratObject_4.1.3 Seurat_4.3.0
## [17] knitr_1.42 tinytex_0.44
## [19] rmarkdown_2.21
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.8 igraph_1.4.1 lazyeval_0.2.2
## [4] sp_1.6-0 splines_4.2.0 listenv_0.9.0
## [7] scattermore_0.8 ggplot2_3.4.1 digest_0.6.31
## [10] htmltools_0.5.5 fansi_1.0.4 magrittr_2.0.3
## [13] tensor_1.5 cluster_2.1.4 ROCR_1.0-11
## [16] globals_0.16.2 spatstat.sparse_3.0-1 colorspace_2.1-0
## [19] ggrepel_0.9.3 xfun_0.38 RCurl_1.98-1.12
## [22] jsonlite_1.8.4 progressr_0.13.0 spatstat.data_3.0-1
## [25] survival_3.5-5 zoo_1.8-11 glue_1.6.2
## [28] polyclip_1.10-4 gtable_0.3.3 zlibbioc_1.44.0
## [31] XVector_0.38.0 leiden_0.4.3 DelayedArray_0.24.0
## [34] future.apply_1.10.0 abind_1.4-5 scales_1.2.1
## [37] DBI_1.1.3 spatstat.random_3.1-4 miniUI_0.1.1.1
## [40] Rcpp_1.0.10 viridisLite_0.4.1 xtable_1.8-4
## [43] reticulate_1.28 htmlwidgets_1.6.2 httr_1.4.5
## [46] RColorBrewer_1.1-3 ellipsis_0.3.2 ica_1.0-3
## [49] farver_2.1.1 pkgconfig_2.0.3 sass_0.4.5
## [52] uwot_0.1.14 deldir_1.0-6 locfit_1.5-9.7
## [55] utf8_1.2.3 tidyselect_1.2.0 labeling_0.4.2
## [58] rlang_1.1.0 reshape2_1.4.4 later_1.3.0
## [61] munsell_0.5.0 tools_4.2.0 cachem_1.0.7
## [64] cli_3.6.1 generics_0.1.3 ggridges_0.5.4
## [67] evaluate_0.20 stringr_1.5.0 fastmap_1.1.1
## [70] yaml_2.3.7 goftest_1.2-3 fitdistrplus_1.1-8
## [73] purrr_1.0.1 RANN_2.6.1 pbapply_1.7-0
## [76] future_1.32.0 nlme_3.1-162 mime_0.12
## [79] compiler_4.2.0 rstudioapi_0.14 plotly_4.10.1
## [82] png_0.1-8 spatstat.utils_3.0-2 tibble_3.2.1
## [85] bslib_0.4.2 stringi_1.7.12 highr_0.10
## [88] lattice_0.20-45 Matrix_1.5-3 vctrs_0.6.1
## [91] pillar_1.9.0 lifecycle_1.0.3 spatstat.geom_3.1-0
## [94] lmtest_0.9-40 jquerylib_0.1.4 RcppAnnoy_0.0.20
## [97] data.table_1.14.8 cowplot_1.1.1 bitops_1.0-7
## [100] irlba_2.3.5.1 httpuv_1.6.9 patchwork_1.1.2
## [103] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-20
## [106] gridExtra_2.3 parallelly_1.35.0 codetools_0.2-19
## [109] MASS_7.3-58.3 withr_2.5.0 sctransform_0.3.5
## [112] GenomeInfoDbData_1.2.9 parallel_4.2.0 grid_4.2.0
## [115] tidyr_1.3.0 Rtsne_0.16 spatstat.explore_3.1-0
## [118] shiny_1.7.4